Guided project analysing birth data in the US.
In [1]:
csv_list = open("US_births_1994-2003_CDC_NCHS.csv").read().split("\n")
In [2]:
csv_list[0:10]
Out[2]:
In [3]:
def read_csv(filename):
string_data = open(filename).read()
string_list = string_data.split("\n")[1:]
final_list = []
for row in string_list:
string_fields = row.split(",")
int_fields = []
for value in string_fields:
int_fields.append(int(value))
final_list.append(int_fields)
return final_list
cdc_list = read_csv("US_births_1994-2003_CDC_NCHS.csv")
In [4]:
cdc_list[0:10]
Out[4]:
In [5]:
def month_births(data):
births_per_month = {}
for row in data:
month = row[1]
births = row[4]
if month in births_per_month:
births_per_month[month] = births_per_month[month] + births
else:
births_per_month[month] = births
return births_per_month
cdc_month_births = month_births(cdc_list)
In [6]:
cdc_month_births
Out[6]:
In [8]:
def dow_births(data):
births_day_of_week = {}
for row in data:
day_of_week = row[3]
births = row[4]
if day_of_week in births_day_of_week:
births_day_of_week[day_of_week] = births_day_of_week[day_of_week] + births
else:
births_day_of_week[day_of_week] = births
return births_day_of_week
cdc_day_births = dow_births(cdc_list)
In [10]:
cdc_day_births
Out[10]:
In [11]:
def calc_counts(data, column):
sum_dict ={}
for row in data:
col_value = row[column]
births = row[4]
if col_value in sum_dict:
sum_dict[col_value] = sum_dict[col_value] + births
else:
sum_dict[col_value] = births
return sum_dict
cdc_year_births = calc_counts(cdc_list, 0)
cdc_month_births = calc_counts(cdc_list, 1)
cdc_dom_births = calc_counts(cdc_list, 2)
cdc_dow_births = calc_counts(cdc_list, 3)
In [12]:
cdc_year_births
Out[12]:
In [13]:
cdc_month_births
Out[13]:
In [14]:
cdc_dom_births
Out[14]:
In [ ]: